library(tidyverse)
library(ggrepel) # Mejora la visualización de texto en ggplots evitando
library(gprofiler2) # enriquecimiento grofiler
library(clusterProfiler) # para enriquecimientos
library(enrichplot) # plots de enriquecimiento
library(DOSE) # needed to convert to enrichResult object
library(reactable) # para tablas interactivasumap.data <- data.frame(umap.ad$layout)[,1:2]
colnames(umap.data) <- c("umap1", "umap2")
covs2_df <- covs2.casos %>%
dplyr::select(mrna_id, sampleset_UMAP)
para.plot <- merge(umap.data, covs2_df, by = "row.names")
rownames(para.plot) <- para.plot$Row.names
para.plot$Row.names <- NULL
ggplot(para.plot, aes_string(x = "umap1", y = "umap2", color = "sampleset_UMAP")) +
geom_point(show.legend = TRUE, size = 3) +
geom_text_repel(aes(label=ifelse(mrna_id %in% rownames(outliers.umap1) | mrna_id %in% rownames(outliers.umap2), as.character(gsub("_.*", "", covs2$mrna_id)), "")),
color = "black",
max.overlaps = 30, # Reduce el número máximo de solapamientos
point.padding = unit(0.2, "lines"), # Menos padding alrededor de los puntos
size = 3,
fontface = "bold",
segment.size = 0.2, # Líneas de guía más finas
segment.color = 'grey50',
max.segment.length = unit(0.5, "lines"), # Líneas de guía más cortas
arrow = arrow(length = unit(0.02, "npc"), type = "closed", ends = "last")) +
theme_minimal() +
labs(title = "UMAP",
x = "umap1", y = "umap2") +
theme(
legend.title = element_blank(),
legend.text = element_text(size = 12),
text = element_text(size = 12),
legend.key.size = unit(0.5, "cm")) +
geom_point(data= (covs2 %>% filter(neuroStatus == 0)), aes_string(x = "umap1", y = "umap2", color = "sampleset_UMAP"), alpha = 0.5) estado <- rep(0, nrow(mat_exp_alz_genes))
estado[rownames(mat_exp_alz_genes) %in% mUMAP1.pos] <- 1
# Añadir la nueva columna a la matriz de expresión de genes
mat_exp_alz_genes.RF <- data.frame(cbind(mat_exp_alz_genes, class = estado))
mat_exp_alz_genes.RF$class <- as.factor(mat_exp_alz_genes.RF$class)
mat_exp_alz_genes.RF <- mat_exp_alz_genes.RF[rownames(covs2.casos),]library(caret)
library(randomForest)
library(doParallel)
num_cores <- detectCores() - 2
registerDoParallel(cores=num_cores)Aquí he utilizado el parámetro importance = “permutation”
# Definir una función para calcular la Accuracy balanceada
balanced_accuracy <- function(data, lev = NULL, model = NULL) {
cm <- confusionMatrix(data$pred, data$obs)
sensitivity <- cm$byClass["Sensitivity"]
specificity <- cm$byClass["Specificity"]
balanced_acc <- mean(c(sensitivity, specificity))
c(BalancedAccuracy = balanced_acc)
}
# Definir las métricas de evaluación que queremos utilizar
RFControl <- trainControl(
method = "cv",
number = 10,
repeats = 10,
summaryFunction = balanced_accuracy,
allowParallel = TRUE,
seeds = NULL,
returnResamp = "final",
)
set.seed(1234)
sqr.genes <- round(sqrt(ncol(mat_exp_alz_genes.RF)))
mygrid <- expand.grid(mtry = c(sqr.genes - 2, sqr.genes - 1, sqr.genes, sqr.genes+1, sqr.genes+2),
splitrule = c("gini", "extratrees"),
min.node.size = 1)
trees <- seq(500, 2000, 500)
rf.cv.10 <- list() # lista vacia para meter los resultados de cada bucle
for (tree in trees){ # bucle en cada pasada hace un modelo con un número de arboles distintos
modelo <- train(class ~., data=mat_exp_alz_genes.RF,
method="ranger",
tuneGrid=mygrid,
trControl=RFControl,
ntree = tree,
metric = "BalancedAccuracy",
importance = "permutation" # o impurity permutation https://arxiv.org/abs/1407.7502
)
rf.cv.10[[paste(tree, "trees")]] <- modelo # metemos el modelo en la lista
}# Hacemos dataframe con las métricas de cada modelo
datos_combinados <- rbind(
data.frame(mtry = rf.cv.10$`500 trees`$results$mtry, splitrule = rf.cv.10$`500 trees`$results$splitrule, MetricValue = rf.cv.10$`500 trees`$results$BalancedAccuracy, Trees = "500 trees", Metric = "Balanced Accuracy"),
data.frame(mtry = rf.cv.10$`1000 trees`$results$mtry, splitrule = rf.cv.10$`500 trees`$results$splitrule, MetricValue = rf.cv.10$`1000 trees`$results$BalancedAccuracy, Trees = "1000 trees", Metric = "Balanced Accuracy"),
data.frame(mtry = rf.cv.10$`1500 trees`$results$mtry, splitrule = rf.cv.10$`500 trees`$results$splitrule, MetricValue = rf.cv.10$`1500 trees`$results$BalancedAccuracy, Trees = "1500 trees", Metric = "Balanced Accuracy"),
data.frame(mtry = rf.cv.10$`2000 trees`$results$mtry, splitrule = rf.cv.10$`500 trees`$results$splitrule, MetricValue = rf.cv.10$`2000 trees`$results$BalancedAccuracy, Trees = "2000 trees", Metric = "Balanced Accuracy")
)
datos_combinados$Trees <- factor(datos_combinados$Trees, levels = c("500 trees", "1000 trees", "1500 trees", "2000 trees"))
datos_combinados %>%
# filter(splitrule == "extratrees") %>%
filter(Metric == "Balanced Accuracy") %>%
ggplot(aes(x = mtry, y = MetricValue, color = Trees)) +
geom_point(size = 4) +
geom_line(linetype = "dashed") +
theme_minimal() +
labs(x = "Mtry", y = "Valor de la Métrica", title = "Comparación de Balanced Accuracy con splitrule") +
facet_wrap(~ splitrule) ##
## Call:
## summary.resamples(object = models)
##
## Models: 500 trees, 1000 trees, 1500 trees, 2000 trees
## Number of resamples: 10
##
## BalancedAccuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 500 trees 0.5 0.6875 0.8333333 0.7916667 0.9583333 1 0
## 1000 trees 0.5 0.6875 0.7916667 0.7833333 0.8333333 1 0
## 1500 trees 0.5 0.6875 0.7916667 0.7833333 0.9583333 1 0
## 2000 trees 0.5 0.7500 0.7916667 0.8000000 0.8333333 1 0
medianas_por_clase <- aggregate(. ~ class, data = mat_exp_alz_genes.RF, FUN = median)
medianas_clase_0 <- medianas_por_clase[medianas_por_clase$class == 0, ]
medianas_clase_1 <- medianas_por_clase[medianas_por_clase$class == 1, ]
genes_upregulados <- colnames(mat_exp_alz_genes.RF)[sapply(colnames(mat_exp_alz_genes.RF), function(gene) {
if (gene != "class") {
return(medianas_clase_1[[gene]] > medianas_clase_0[[gene]])
} else {
return(FALSE)
}
})]
genes_downregulados <- colnames(mat_exp_alz_genes.RF)[sapply(colnames(mat_exp_alz_genes.RF), function(gene) {
if (gene != "class") {
return(medianas_clase_1[[gene]] < medianas_clase_0[[gene]])
} else {
return(FALSE)
}
})]important.genes.umap1pos <- varImp(rf.cv.10$`2000 trees`)$importance %>%
rownames_to_column() %>%
arrange(desc(Overall))
reactable(important.genes.umap1pos)important.genes.umap1pos %>%
filter(rowname %in% genes_upregulados) %>%
head(50) %>%
mutate(row_index = row_number(), # Crear una columna numérica para el índice
rowname = forcats::fct_inorder(rowname)) %>%
ggplot(aes(x = row_index, y = Overall)) + # Usar row_index en el eje X
geom_point() +
geom_line() +
labs(x = "Número Genes", title = "Importancia genes sobreexpresados en UMAP1+ en RF") +
theme_bw() +
theme(axis.text.x = element_text(size = 10))important.genes.umap1pos %>%
filter(rowname %in% genes_downregulados) %>%
head(50) %>%
mutate(row_index = row_number(), # Crear una columna numérica para el índice
rowname = forcats::fct_inorder(rowname)) %>%
ggplot(aes(x = row_index, y = Overall)) + # Usar row_index en el eje X
geom_point() +
geom_line() +
labs(x = "Número Genes", title = "Importancia genes infraexpresados en UMAP1+ en RF") +
theme_bw() +
theme(axis.text.x = element_text(size = 10))important.genes.top <- important.genes.umap1pos%>%
head(., 10)
selected_columns <- colnames(mat_exp_alz_genes.RF) %in% c(important.genes.top$rowname, "class")
df <- mat_exp_alz_genes.RF[, selected_columns]
df_long <- df %>%
gather(key = "Gene", value = "Expression", -class)
df_long$Gene <- factor(df_long$Gene, levels = important.genes.top$rowname)
ggplot(df_long, aes(x = class, y = Expression, fill = class)) +
geom_boxplot() +
facet_wrap(~ Gene, scales = "free_y") +
labs(title = "Boxplots Genes", x = "UMAP1pos", y = "Expression", fill = "mUMAP1pos") matriz.AD.minus.important.genes <- mat_exp_alz_genes.RF[,!colnames(mat_exp_alz_genes.RF) %in% important.genes.top$rowname]
gp_up <- gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "GO",
multi_query = T,
highlight = T,
exclude_iea = T)
gostplot(gp_up, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "GO",
highlight = T,
exclude_iea = T
)
multi_gp$result <- multi_gp$result %>%
filter(highlighted == T)
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "GO 10 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "KEGG",
multi_query = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "KEGG")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(generationumerico))
enrichplot::dotplot(gp_mod_enrich, showCategory = 30, x = "Cluster", size = "GeneRatio", title = "KEGG 10 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "REAC",
multi_query = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "REAC")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "REAC 10 genes")important.genes.top <- important.genes.umap1pos%>%
head(., 20)
selected_columns <- colnames(mat_exp_alz_genes.RF) %in% c(important.genes.top$rowname, "class")
df <- mat_exp_alz_genes.RF[, selected_columns]
df_long <- df %>%
gather(key = "Gene", value = "Expression", -class)
df_long$Gene <- factor(df_long$Gene, levels = important.genes.top$rowname)
ggplot(df_long, aes(x = class, y = Expression, fill = class)) +
geom_boxplot() +
facet_wrap(~ Gene, scales = "free_y") +
labs(title = "Boxplots Genes", x = "UMAP1pos", y = "Expression", fill = "mUMAP1pos") matriz.AD.minus.important.genes <- mat_exp_alz_genes.RF[,!colnames(mat_exp_alz_genes.RF) %in% important.genes.top$rowname]
top_genes <- important.genes.top$rowname
gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "GO",
multi_query = T,
highlight = T,
exclude_iea = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "GO",
highlight = T,
exclude_iea = T)
multi_gp$result <- multi_gp$result %>%
filter(highlighted == T)
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "GO 20 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "KEGG",
multi_query = T,
highlight = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "KEGG")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(generationumerico))
enrichplot::dotplot(gp_mod_enrich, showCategory = 30, x = "Cluster", size = "GeneRatio", title = "KEGG 20 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "REAC",
multi_query = T,
highlight = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "REAC")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "REAC 20 genes")important.genes.top <- important.genes.umap1pos%>%
head(., 30)
selected_columns <- colnames(mat_exp_alz_genes.RF) %in% c(important.genes.top$rowname, "class")
df <- mat_exp_alz_genes.RF[, selected_columns]
df_long <- df %>%
gather(key = "Gene", value = "Expression", -class)
df_long$Gene <- factor(df_long$Gene, levels = important.genes.top$rowname)
ggplot(df_long, aes(x = class, y = Expression, fill = class)) +
geom_boxplot() +
facet_wrap(~ Gene, scales = "free_y") +
labs(title = "Boxplots Genes", x = "UMAP1pos", y = "Expression", fill = "mUMAP1pos") matriz.AD.minus.important.genes <- mat_exp_alz_genes.RF[,!colnames(mat_exp_alz_genes.RF) %in% important.genes.top$rowname]
top_genes <- important.genes.top$rowname
gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "GO",
multi_query = T,
highlight = T,
exclude_iea = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname[important.genes.top$rowname %in% genes_upregulados])
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes)[colnames(matriz.AD.minus.important.genes) %in% genes_upregulados])
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "GO",
highlight = T,
exclude_iea = T)
multi_gp$result <- multi_gp$result %>%
filter(highlighted == T)
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "GO 30 genes upregulated")top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname[important.genes.top$rowname %in% genes_downregulados])
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes)[colnames(matriz.AD.minus.important.genes) %in% genes_downregulados])
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "GO",
highlight = T,
exclude_iea = T)
multi_gp$result <- multi_gp$result %>%
filter(highlighted == T)
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "GO 30 genes downregulated")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "KEGG",
multi_query = T,
highlight = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "KEGG")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(generationumerico))
enrichplot::dotplot(gp_mod_enrich, showCategory = 30, x = "Cluster", size = "GeneRatio", title = "KEGG 30 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "REAC",
multi_query = T,
highlight = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "REAC")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "REAC 30 genes")important.genes.top <- important.genes.umap1pos%>%
head(., 40)
selected_columns <- colnames(mat_exp_alz_genes.RF) %in% c(important.genes.top$rowname, "class")
df <- mat_exp_alz_genes.RF[, selected_columns]
df_long <- df %>%
gather(key = "Gene", value = "Expression", -class)
df_long$Gene <- factor(df_long$Gene, levels = important.genes.top$rowname)
ggplot(df_long, aes(x = class, y = Expression, fill = class)) +
geom_boxplot() +
facet_wrap(~ Gene, scales = "free_y") +
labs(title = "Boxplots Genes", x = "UMAP1pos", y = "Expression", fill = "mUMAP1pos") matriz.AD.minus.important.genes <- mat_exp_alz_genes.RF[,!colnames(mat_exp_alz_genes.RF) %in% important.genes.top$rowname]
top_genes <- important.genes.top$rowname
gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "GO",
multi_query = T,
highlight = T,
exclude_iea = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "GO",
highlight = T,
exclude_iea = T)
multi_gp$result <- multi_gp$result %>%
filter(highlighted == T)
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "GO 40 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "KEGG",
multi_query = T,
highlight = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "KEGG")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(generationumerico))
enrichplot::dotplot(gp_mod_enrich, showCategory = 30, x = "Cluster", size = "GeneRatio", title = "KEGG 40 genes")gp_up_ordered = gost(list("Specific UMAP1+ genes" = important.genes.top$rowname,
"Matriz genes (No UMAP1+ genes)" = colnames(matriz.AD.minus.important.genes)) ,
organism = "hsapiens",
sources = "REAC",
multi_query = T,
highlight = T)
gostplot(gp_up_ordered, interactive = T, capped = T)top_genes_umap1pos.gprofiler = gconvert(important.genes.top$rowname)
top_genes_matriz.gprofiler = gconvert(colnames(matriz.AD.minus.important.genes))
# enrichment analysis using gene names
multi_gp = gost(list("Specific UMAP1+ genes" = top_genes_umap1pos.gprofiler$name,
"Matriz genes (No UMAP1+ genes)" = top_genes_matriz.gprofiler$name),
multi_query = FALSE,
evcodes = TRUE,
sources = "REAC")
# modify the g:Profiler data frame
gp_mod = multi_gp$result[,c("query", "source", "term_id",
"term_name", "p_value", "query_size",
"intersection_size", "term_size",
"effective_domain_size", "intersection")]
gp_mod$GeneRatio = paste0(gp_mod$intersection_size, "/", gp_mod$query_size)
gp_mod$BgRatio = paste0(gp_mod$term_size, "/", gp_mod$effective_domain_size)
names(gp_mod) = c("Cluster", "Category", "ID", "Description", "p.adjust",
"query_size", "Count", "term_size", "effective_domain_size",
"geneID", "GeneRatio", "BgRatio")
gp_mod$geneID = gsub(",", "/", gp_mod$geneID)
# define as enrichResult object
gp_mod_enrich = new("enrichResult", result = gp_mod)
gp_mod_enrich@result <- gp_mod_enrich@result %>%
mutate(generationumerico = Count / query_size) %>%
arrange(desc(Cluster))
enrichplot::dotplot(gp_mod_enrich, showCategory = 20, x = "Cluster", size = "GeneRatio", title = "REAC 40 genes")